####
#    This syntax replicates the results of the paper
#    Gr�mping, Max, and Darren R. Halpin. 2019. "Does group engagement with members constitute a "beneficial inefficiency"?",
#    in: Governance, doi:10.1111/gove.12388.
#
#    It requires the file 'dat.RData' in the working directory
# 
####



rm(list=ls())
options(scipen=999)

if (!require("pacman")) install.packages("pacman")
pacman::p_load("cba")
pacman::p_load("klaR")
pacman::p_load("cluster")
pacman::p_load("fpc")
pacman::p_load("factoextra")
pacman::p_load("glmmTMB")
pacman::p_load("lme4")
pacman::p_load("pscl")
pacman::p_load("psych")
pacman::p_load("polycor")
pacman::p_load("stargazer")
pacman::p_load("dendextend")
pacman::p_load("Hmisc")
pacman::p_load("dendextend")
pacman::p_load("plyr")


#Set WD
setwd("YOUR WORKING DIRECTORY HERE\\")


# LOAD DATA ----------------------------------------------------------

load("dat.RData")


# codebook
# q13   = membership of organization ('organisations', 'individuals', 'associations', 'mixture', 'no members')
# q11   = structure of organization ('federated', 'unitary', 'online')
# q28   = involvement of members ('not', 'somewhat', 'very')
# v216  = how important is it to be visible to organization's members ('not at all important', 'slightly important', 'important', 'fairly important', 'very important')
# expanded  = has org expaned opportunities for members to participate and/or added branches ('none', 'one' 'both')
# citizen_dummy = 1=citizen group, 0=otherwise
# age = age of organization (years)
# q14 = staff
# oral_tot  = access (invitations to speak)
# writt_tot = access (written submissions)
# log_staff = log(1 + q14)
# poldomains = Breadth of policy engagement (number of policy domains)



# LOAD FUNCTIONS ----------------------------------------------------------


# variance inflation factor (VIF) for lme method  (tests for multicolinearity)
vif.lme <- function (fit) {
  ## adapted from rms::vif
  v <- vcov(fit)
  nam <- names(fixef(fit))
  ## exclude intercepts
  ns <- sum(1 * (nam == "Intercept" | nam == "(Intercept)"))
  if (ns > 0) {
    v <- v[-(1:ns), -(1:ns), drop = FALSE]
    nam <- nam[-(1:ns)] }
  d <- diag(v)^0.5
  v <- diag(solve(v/(d %o% d)))
  names(v) <- nam
  v }




# Function that returns Root Mean Squared Error
rmse <- function(error)
{
  sqrt(mean(error^2))
}








multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
  require(grid)
  
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  if (is.null(layout)) {
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots == 1) {
    print(plots[[1]])
    
  } else {
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    for (i in 1:numPlots) {
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}



# PREPARE DATA ------------------------------------------------------------

 

# Turn variables into ordinal  
dat$expanded.ord <- ordered(dat$expanded, levels = c("none", "one", "both"))
dat$q13.ord <- ordered(dat$q13, levels = c("no members", "individuals", "mixture", "organisations", "associations" ))
dat$q11.ord <- ordered(dat$q11, levels = c("online", "unitary", "federated"))
dat$q28.ord <- ordered(dat$q28, levels = c("not", "somewhat", "very"))
dat$v216.ord <- ordered(dat$v216, levels = c("not at all important", "slightly important", "important",  "fairly important", "very important" ))

# Turn variables into interval  
dat$expanded.num <- as.numeric(dat$expanded.ord)
dat$q13.num <- as.numeric(dat$q13.ord)
dat$q11.num<- as.numeric(dat$q11.ord)
dat$q28.num<- as.numeric(dat$q28.ord)
dat$v216.num <- as.numeric(dat$v216.ord)


# additive inefficiency index
dat$ineff3 <- (dat$q13.num /5 + dat$q11.num/3 + dat$q28.num/3 + dat$expanded.num/3 + dat$v216.num /5)/5





# CLUSTER ANALYSIS ------------------------------------------

### categorical
cat <- dat[c("q13",
             "q11",
             "q28",
             "expanded", 
             "v216", 
             "ineff3"
)]
cat <- cat[complete.cases(cat),]

# Dissimilarity matrix 
distm <- daisy(cat[,1:5], metric = "gower")



# FIGURE A1 ---------------------------------------------------------------

# Hierarchical agglomerative clustering (HAC)  DIFFERENT METHODS
hac1 <- agnes(distm, diss = TRUE, method = "ward")
haca <- agnes(distm, diss = TRUE, method = "average")
hacb <- agnes(distm, diss = TRUE, method = "single")
hacc <- agnes(distm, diss = TRUE, method = "complete")
hacd <- agnes(distm, diss = TRUE, method = "weighted")

g1 <- fviz_dend(hac1, 
                show_labels = F,
                horiz=T,
                type="rectangle",
                main = "Ward",
                ggtheme = theme_minimal()
)

g2 <- fviz_dend(haca, 
                show_labels = F,
                horiz=T,
                type="rectangle",
                main = "Average",
                ggtheme = theme_minimal()
)

g3 <- fviz_dend(hacb, 
                show_labels = F,
                horiz=T,
                type="rectangle",
                main = "Single linkage",
                ggtheme = theme_minimal()
)

g4 <- fviz_dend(hacc, 
                show_labels = F,
                horiz=T,
                type="rectangle",
                main = "Complete linkage",
                ggtheme = theme_minimal()
)

g5 <- fviz_dend(hacd, 
                show_labels = F,
                horiz=T,
                type="rectangle",
                main = "Weighted average",
                ggtheme = theme_minimal()
)

multiplot(g1, g3, g2, g4, g5, cols = 2)
ggsave(plot = multiplot(g1, g3, g2, g4, g5, cols = 2), filename = "Fig A1.jpg", width = 9, height = 10, dpi = 600, units = "in")

rm(g1)
rm(g2)
rm(g3)
rm(g4)
rm(g5)





# TABLE A4 ----------------------------------------------------------------

# Create two dendrograms
dend1 <- as.dendrogram (hac1)
dend2 <- as.dendrogram (haca)
dend3 <- as.dendrogram (hacb)
dend4 <- as.dendrogram (hacc)
dend5 <- as.dendrogram (hacd)

dend_list <- dendlist("Ward" = dend1, "Average" = dend2,
                      "Single" = dend3, "Complete" = dend4, "Weighted average" = dend5)

# Cophenetic correlation matrix
cors <- cor.dendlist(dend_list, method = "cophenetic")
cortable <- as.data.frame(round(cors, 2))

write.csv(cortable, "Table A4.csv")

rm(dend1)
rm(dend2)
rm(dend3)
rm(dend4)
rm(dend5)
rm(cors)
rm(dend_list)
rm(cortable)



# FIGURE A2  -----------------------------------------------

pacman::p_load(clValid)
pacman::p_load(clusterSim)

# bind all relevant stats together
stats_2 <- as.data.frame(cluster.stats(distm,stats::cutree(hac1,k=2),  G2 = T, G3 = T)[c("avg.silwidth", "g2", "g3", "dunn","dunn2")])
stats_3 <- as.data.frame(cluster.stats(distm,stats::cutree(hac1,k=3),  G2 = T, G3 = T)[c("avg.silwidth", "g2", "g3", "dunn","dunn2")])
stats_4 <- as.data.frame(cluster.stats(distm,stats::cutree(hac1,k=4),  G2 = T, G3 = T)[c("avg.silwidth", "g2", "g3", "dunn","dunn2")])
stats_5 <- as.data.frame(cluster.stats(distm,stats::cutree(hac1,k=5),  G2 = T, G3 = T)[c("avg.silwidth", "g2", "g3", "dunn","dunn2")])
stats <- rbind(stats_2, stats_3, stats_4, stats_5)

g1 <- fviz_silhouette(silhouette(stats::cutree(hac1,k=2), distm))
g2 <- fviz_silhouette(silhouette(stats::cutree(hac1,k=3), distm))
g3 <- fviz_silhouette(silhouette(stats::cutree(hac1,k=4), distm))
g4 <- fviz_silhouette(silhouette(stats::cutree(hac1,k=5), distm))

multiplot(g1, g3, g2, g4, cols = 2)
ggsave(plot = multiplot(g1, g3, g2, g4, cols = 2), filename = "Fig A2.jpg", width = 9, height = 9, dpi = 600, units = "in")


rm(g1)
rm(g2)
rm(g3)
rm(g4)
rm(stats)
rm(stats_2)
rm(stats_3)
rm(stats_4)
rm(stats_5)




# FIGURE A3 - FINAL CLUSTERING -----------------------------------------------------

# according to above criteria, select Ward method, 3 clusters

clust_final <- stats::cutree(hac1,k=3)

# add cluster to data
dat <- dat[complete.cases(dat[c("q13",
                                "q11",
                                "q28",
                                "expanded", 
                                "v216",
                                "ineff3"
)]),]
dat <- cbind(dat,clust_final)


# final dendogram
fviz_dend(hac1, k = 4, 
          cex = 0.01, 
          k_colors = c("#5fa35c", "#e1a65c", "#472d13"),
          color_labels_by_k = TRUE, 
          rect = TRUE, 
          horiz=T,
          type="rectangle",
          ggtheme = theme_minimal()
)



ggsave(filename = "Fig A3.jpg", width = 9, height = 9, dpi = 600, units = "in")



rm(hac1)
rm(hacb)
rm(hacc)
rm(hacd)
rm(haca)
rm(distm)




# TABLE 1 -------------------------------------------------------

dat$q13 <- droplevels(dat$q13)
dat$v216 <- droplevels(dat$v216)

round(prop.table(xtabs(~ q13 + clust_final, data=dat), c(2))*100) # membership type
round(prop.table(xtabs(~ q11 + clust_final, data=dat), c(2))*100) # structure
round(prop.table(xtabs(~ q28 + clust_final, data=dat), c(2))*100) # involvement of members
round(prop.table(xtabs(~ expanded + clust_final, data=dat), c(2))*100)  # Expanded opps to participate
round(prop.table(xtabs(~ v216 + clust_final, data=dat), c(2))*100) # Importance of being visible to members


# test of equality of a variable's distribution among the four clusters; chi-square for categorical variables
vcd::assocstats(xtabs(~ q13 + clust_final, data=dat))
vcd::assocstats(xtabs(~ q11 + clust_final, data=dat))
vcd::assocstats(xtabs(~ q28 + clust_final, data=dat))
vcd::assocstats(xtabs(~ expanded + clust_final, data=dat))
vcd::assocstats(xtabs(~ v216 + clust_final, data=dat))

# test of equality of a variable's distribution within a cluster versus the variable's overall distribution
chisq.test(cbind(table(dat$q13), table(dat$q13[dat$clust_final==1])))
chisq.test(cbind(table(dat$q13), table(dat$q13[dat$clust_final==2])))
chisq.test(cbind(table(dat$q13), table(dat$q13[dat$clust_final==3])))

chisq.test(cbind(table(dat$q11), table(dat$q11[dat$clust_final==1])))
chisq.test(cbind(table(dat$q11), table(dat$q11[dat$clust_final==2])))
chisq.test(cbind(table(dat$q11), table(dat$q11[dat$clust_final==3])))

chisq.test(cbind(table(dat$q28), table(dat$q28[dat$clust_final==1])))
chisq.test(cbind(table(dat$q28), table(dat$q28[dat$clust_final==2])))
chisq.test(cbind(table(dat$q28), table(dat$q28[dat$clust_final==3])))

chisq.test(cbind(table(dat$expanded), table(dat$expanded[dat$clust_final==1])))
chisq.test(cbind(table(dat$expanded), table(dat$expanded[dat$clust_final==2])))
chisq.test(cbind(table(dat$expanded), table(dat$expanded[dat$clust_final==3])))

chisq.test(cbind(table(dat$v216), table(dat$v216[dat$clust_final==1])))
chisq.test(cbind(table(dat$v216), table(dat$v216[dat$clust_final==2])))
chisq.test(cbind(table(dat$v216), table(dat$v216[dat$clust_final==3])))




# TABLE 2 -----


ddply(dat, c("clust_final"), summarise, mean = mean(age, na.rm=T), sd = sqrt(var(age, na.rm=T)), ci = ((sqrt(var(age, na.rm=T))) / (sqrt(length(age))))*1.96)
ddply(dat, c("clust_final"), summarise, mean = mean(q14, na.rm=T), sd = sqrt(var(q14, na.rm=T)), ci = ((sqrt(var(q14, na.rm=T))) / (sqrt(length(q14))))*1.96)

vcd::assocstats(xtabs(~ citizen_dummy + clust_final, data=dat))

chisq.test(table(dat$citizen_dummy, dat$clust_final))

gmodels::CrossTable(dat$citizen_dummy, dat$clust_final, expected=T, prop.r=F, prop.c=T,
                    prop.t=F, prop.chisq=F, chisq = T)



# FIGURE 1 ------------------------------------------------------------

ggplot(dat, aes(oral_tot)) +
  stat_ecdf(geom = "line", pad = F) +
  xlab("Access (count)") +
  ylab("Percent of groups") +
  scale_y_continuous(label=scales::percent) +
  scale_x_continuous(breaks = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) +
    theme_bw()

ggsave(filename = "Fig 1.jpg", width = 3, height = 3, dpi = 600, units = "in")#unlink("Fig 1.jpg")








# EXPLORATORY FACTOR ANALYSIS ---------------------------------------------

### categorical
cat <- dat[c("q13",
             "q11",
             "q28",
             "expanded", 
             "v216", 
             "ineff3"
)]
cat <- cat[complete.cases(cat),]

pc <- hetcor(cat[,1:5], ML=TRUE) 
faPC <- fa(r=pc$correlations, nfactors=1, n.obs=328, rotate="varimax")
scores <- psych::factor.scores(data.matrix(cat[,1:5]), faPC)
cat <- cbind(cat, scores$scores)
dat <- cbind(dat, cat$MR1 )
colnames(dat)[26] <- "MR1"

# correlate factor scores with additive index and test alpha
cor.test(cat$MR1, dat$ineff3)    #0.7628
psych::alpha(dat[,c("q13.num","q11.num","q28.num","expanded.num","v216.num")])    #0.44 

rm(cat)



# MODELS ------------------------------------------------------------------

dat$clust_final <- factor(dat$clust_final)
dat$clust_final <- relevel(dat$clust_final, ref = 3)

# multicolinearity
newdata <- dat[c("oral_tot",
                 "clust_final",
                 "citizen_dummy",
                 "age",
                 "log_staff",
                 "poldomains")]
usdm::vif(newdata)
rm(newdata)

# 1) DV: oral_tot, different models
form <- oral_tot ~ clust_final + citizen_dummy + age + log_staff + poldomains
pm1.1 = glm(form, data=dat, family=poisson(link = "log"))
nbm1.1 = glm.nb(form, data=dat)
zinbm1.1 = zeroinfl(form, dat)
hpm1.1 = hurdle(form, dat, dist="poisson", link="logit")
hnbm1.1 = hurdle(form, dat, dist="negbin", link="logit")
bbmle::AICtab(pm1.1, nbm1.1, zinbm1.1, hpm1.1, hnbm1.1)

# 2) DV: oral_tot, w interaction term
form <- oral_tot ~ clust_final*log_staff + citizen_dummy + age + poldomains
pm1.int = glm(form, data=dat, family=poisson(link = "log"))
nbm1.int = glm.nb(form, data=dat)
zinbm1.int = zeroinfl(form, dat)
hpm1.int = hurdle(form, dat, dist="poisson", link="logit")
hnbm1.int = hurdle(form, dat, dist="negbin", link="logit")

# 3) DV: writt_tot
form <- writt_tot ~ clust_final + citizen_dummy + age + log_staff + poldomains
pm2.1 = glm(form, data=dat, family=poisson(link = "log"))
nbm2.1 = glm.nb(form, data=dat)
zinbm2.1 = zeroinfl(form, dat)
hpm2.1 = hurdle(form, dat, dist="poisson", link="logit")
hnbm2.1 = hurdle(form, dat, dist="negbin", link="logit")
bbmle::AICtab(pm2.1, nbm2.1, zinbm2.1, hpm2.1, hnbm2.1)

# 4) model including inefficiency index
form <- writt_tot ~ ineff3 + citizen_dummy + age + log_staff + poldomains
nbm3.1 = glm.nb(form, data=dat)

# 5) model including result of PCA
form <- writt_tot ~ MR1 + citizen_dummy + age + log_staff + poldomains
nbm4.1 = glm.nb(form, data=dat)





# TABLE 3 -----------------------------------------------------------------

rcompanion::nagelkerke(nbm1.1)
rcompanion::nagelkerke(nbm1.int)

texreg::htmlreg((list(nbm1.1, nbm1.int)),
                single.row=TRUE, bold = 0.05,stars = c(0.001, 0.01, 0.05), leading.zero = F,
                inline.css = FALSE, doctype = TRUE, html.tag = TRUE, head.tag = TRUE, body.tag = TRUE,
                file="Table 3.doc")






# FIGURE 2 ----------------------------------------------------------------

newdata1 <- expand.grid(citizen_dummy = c(0),
                        age = c(34),
                        poldomains = 3,
                        log_staff = c(0,0.25,0.5,0.75,1,1.25,1.5,1.75,2,2.25,2.5,2.75,3,3.25,3.5,3.75,4),
                        clust_final = factor(c(3,1,2)))
newdata3 <- cbind(newdata1, predict(nbm1.int, newdata = newdata1, type = "response", se.fit = TRUE))
newdata3$LL <- (newdata3$fit - (1.64  * newdata3$se.fit))
newdata3$UL <- (newdata3$fit + (1.64  * newdata3$se.fit))

newdata3 <- newdata3[as.character(newdata3$clust_final)!=1,]



ggplot(newdata3, aes(x=log_staff, y=fit, ymin=LL, ymax= UL, shape=factor(clust_final))) +
  geom_point(aes(shape = factor(clust_final))) +
  geom_line(data=newdata3, aes(linetype = factor(clust_final))) +
  geom_ribbon(aes(ymin=LL,ymax=UL), alpha=0.1) +
  scale_linetype_discrete(name  ="Organizational\ninefficiency\n", breaks=c("2", "3"), labels=c("High\n(Federated\nMember Groups)\n", "Low\n(Staff-Directed)"))+
  scale_shape_discrete(name  ="Organizational\ninefficiency\n", breaks=c("2", "3"), labels=c("High\n(Federated\nMember Groups)\n", "Low\n(Staff-Directed)"))+
  labs(x = "Resources (logged staff)", y = "Access (predicted count of invitations)") +
  coord_cartesian(ylim = c(0,2)) +
  theme_bw() 

ggsave(filename = "Fig 2.jpg", width = 6, height = 4, dpi = 600, units = "in")






# TABLE A5 ----------------------------------------------------------------

rcompanion::nagelkerke(pm1.1)
rcompanion::nagelkerke(zinbm1.1)
rcompanion::nagelkerke(hpm1.1)
rcompanion::nagelkerke(hnbm1.1)
rcompanion::nagelkerke(nbm1.1)

newdata <- dat[c("oral_tot",
              "clust_final",
              "citizen_dummy",
              "age",
              "log_staff",
              "poldomains")]
newdata <- newdata[complete.cases(newdata),]

pred <- predict(pm1.1, newdata, se.fit=T)
pred$pm1.1<- pred$fit
newdata <- cbind(newdata, pred[4])

pred <- predict(nbm1.1, newdata, se.fit=T)
pred$nbm1.1 <- pred$fit
newdata <- cbind(newdata, pred[4])

pred <- predict(zinbm1.1, newdata, se.fit=T)
newdata$zinbm1.1 <- pred

pred <- predict(hpm1.1, newdata, se.fit=T)
newdata$hpm1.1 <- pred

pred <- predict(hnbm1.1, newdata, se.fit=T)
newdata$hnbm1.1 <- pred


rmse(newdata$oral_tot-newdata$pm1.1)  #  root mean squared error  # 22.385
rmse(newdata$oral_tot-newdata$hpm1.1)  #  root mean squared error  # 22.376
rmse(newdata$oral_tot-newdata$zinbm1.1)  #  root mean squared error  # 22.328
rmse(newdata$oral_tot-newdata$hnbm1.1)  #  root mean squared error  # 22.374
rmse(newdata$oral_tot-newdata$nbm1.1)  #  root mean squared error  # 22.374

texreg::htmlreg((list(pm1.1, hpm1.1, zinbm1.1, hnbm1.1, nbm1.1)),
        single.row=TRUE, bold = 0.05,stars = c(0.001, 0.01, 0.05), leading.zero = F,
        inline.css = FALSE, doctype = TRUE, html.tag = TRUE, head.tag = TRUE, body.tag = TRUE,
        file="Table A5.doc")





# TABLE A6 ----------------------------------------------------------------

texreg::htmlreg((list(pm1.int, hpm1.int, zinbm1.int, hnbm1.int, nbm1.int)),
                single.row=TRUE, bold = 0.05,stars = c(0.001, 0.01, 0.05, 0.1), leading.zero = F,
                inline.css = FALSE, doctype = TRUE, html.tag = TRUE, head.tag = TRUE, body.tag = TRUE,
                file="Table A6.doc")



rcompanion::nagelkerke(pm1.int)
rcompanion::nagelkerke(nbm1.int)
rcompanion::nagelkerke(zinbm1.int)
rcompanion::nagelkerke(hpm1.int)
rcompanion::nagelkerke(hnbm1.int)



# TABLE A7 ----------------------------------------------------------------

rcompanion::nagelkerke(pm2.1)
rcompanion::nagelkerke(nbm2.1)
rcompanion::nagelkerke(zinbm2.1)
rcompanion::nagelkerke(hpm2.1)
rcompanion::nagelkerke(hnbm2.1)

texreg::htmlreg((list(pm2.1, hpm2.1, zinbm2.1, hnbm2.1, nbm2.1)),
                single.row=TRUE, bold = 0.05,stars = c(0.001, 0.01, 0.05), leading.zero = F,
                inline.css = FALSE, doctype = TRUE, html.tag = TRUE, head.tag = TRUE, body.tag = TRUE,
                file="Table A7.doc")
#unlink("Table A7.doc")





# TABLE A8 ----------------------------------------------------------------

rcompanion::nagelkerke(nbm3.1)
rcompanion::nagelkerke(nbm4.1)

texreg::htmlreg((list(nbm3.1, nbm4.1)),
                single.row=TRUE, bold = 0.05,stars = c(0.001, 0.01, 0.05), leading.zero = F,
                inline.css = FALSE, doctype = TRUE, html.tag = TRUE, head.tag = TRUE, body.tag = TRUE,
                file="Table A8.doc")





# TABLE A2 --------------------------------------------------

desc <- dat[c("oral_tot",
              "clust_final",
              "q11.num",
              "q13.num",
              "q28.num",
              "expanded.num",
              "v216.num",
              "citizen_dummy",
              "age",
              "log_staff",
              "age")]
stargazer(desc, type = "text", out="Table A2.html")

describe(dat$q11.num)
describe(dat$q13.num)
describe(dat$q28.num)
describe(dat$expanded.num)
describe(dat$v216.num)


table(dat$q11)
table(dat$q13)
table(dat$q28)
table(dat$expanded)
table(dat$v216)




# TABLE A3 ----------------------------------------------------------------

write.csv(Hmisc::rcorr(as.matrix(dat[,c("oral_tot", "q11.num","q13.num","q28.num","expanded.num","v216.num", "ineff3", "MR1")]))[1], "Table A3.csv")









